#@title Hide code
!pip install geopandas
!apt-get -qq install python-cartopy python3-cartopy
!pip uninstall shapely
!pip install shapely --no-binary shapely
import cartopy
!pip3 install geoplot
import statsmodels.iolib.foreign as smio
from pandas import DataFrame
import pandas as pd
import geopandas as gpd
import altair as alt
import vega_datasets as data
import json
import warnings
import numpy as np
import matplotlib.pyplot as plt
import geoplot as gplt
import geoplot.crs as gcrs
warnings.filterwarnings('ignore')
#@Hide code
from google.colab import drive
drive.mount('/content/drive')
#@Hide code
cd "/content/drive/My Drive/Datavisualization"
urls = ["https://docs.google.com/spreadsheets/d/1sUA47Q8OrH18YI4q_DOP7OWIoHX2i8kH0fk5E4lFszE/export?format=csv&gid=1048618207",
"https://docs.google.com/spreadsheets/d/18KT-PC9IXKqYL-wd5zHHzYV4K8eZ9mgUe98XaFANMQM/export?format=csv&gid=2072191966",
"https://docs.google.com/spreadsheets/d/1p_fq_sG3nYnHdOkLVglxXF2zmT1XrPK0c-538lcFIAg/export?format=csv&gid=1688370404",
"https://docs.google.com/spreadsheets/d/1ymCdsBHgamJis3_2PK0IZS_VZvls5Umn-ng9o4S0Sro/export?format=csv&gid=119942101",
"https://docs.google.com/spreadsheets/d/1AqUeUfzuAUG5pCLJGbrysCcTxICCrHsA8rRE1ffHZpU/export?format=csv&gid=1384942932",
"https://docs.google.com/spreadsheets/d/1zvfu9Nc27aGo2ovAn2kqofv-iVXO61-n-dQ4DIrpbUs/export?format=csv&gid=2105631292",
"https://docs.google.com/spreadsheets/d/1XAXIQeLrgxgEx6CCHxWOmfgmInRxwCxu-4vw1PR8dp8/export?format=csv&gid=1064535711",
"https://docs.google.com/spreadsheets/d/1xad1XOm-J4T13rYFJ7BBeskaaCV88BAQZExqNX3aX0w/export?format=csv&gid=95139586",
"https://docs.google.com/spreadsheets/d/1pBj5q5NkzpZUKeOqFZCGb0RdzV1mvS8rw2iXWcwYKbc/export?format=csv&gid=616415653",
"https://docs.google.com/spreadsheets/d/1dZBbXEKSRqhR8Yy9_zFBAjzdw_Gyu3czYT0GNcGhrZI/export?format=csv&gid=687388009",
"https://docs.google.com/spreadsheets/d/1pReietRze4QKpMOM6dbJeJ4bdekcGgrxcaC5k4Qv8C8/export?format=csv&gid=2108461693",
"https://docs.google.com/spreadsheets/d/1KkqQ8U4vL5Tjy7Cq16FQD7xi3qnfPnyYiMYQuiJ6Hkc/export?format=csv&gid=957537605"]
dfs =[]
for url in urls:
df = pd.read_csv(url, encoding='latin-1')[1:]
dfs.append(df)
pop65_conc = pd.concat(dfs)
col_use = ["date", "HC01_EST_VC01", "HC02_EST_VC01"]
pop65_conc = pop65_conc[col_use]
pop65_conc["HC01_EST_VC01"]=pop65_conc["HC01_EST_VC01"].astype("int")
pop65_conc["HC02_EST_VC01"]=pop65_conc["HC02_EST_VC01"].astype("int")
pop65_conc["pop65_rate"] = pop65_conc["HC02_EST_VC01"]/pop65_conc["HC01_EST_VC01"]
pop65_conc["date"] =pd.to_datetime(pop65_conc["date"])
pop65_conc["year"] = pop65_conc["date"].dt.year
pop65_conc = pop65_conc.drop("date", axis=1)
pd.set_option('display.max_rows', 10000)
pop65_conc["year"] = pop65_conc["year"].astype("str")
step = 30
overlap = 0.5
alt.themes.enable('latimes')
chart = alt.Chart(pop65_conc, height=step, width=700).transform_joinaggregate(
mean_rate='mean(pop65_rate)', groupby=['year']
).transform_bin(
['bin_max', 'bin_min'], field='pop65_rate', bin=alt.BinParams(step=0.03)
).transform_aggregate(
value='count()', groupby=['year','mean_rate', 'bin_min', 'bin_max']
).transform_impute(
impute='value', groupby=['year','mean_rate'], key='bin_min', value=0
).mark_area(
interpolate='monotone',
fillOpacity=0.8,
stroke='lightgray',
strokeWidth=0.5
).encode(
alt.X('bin_min:Q', bin='binned', title='population rate of over 65',
axis=alt.Axis(title='Population rate of over age 65', ),
scale=alt.Scale(domain=(0.05, 0.30))),
alt.Y(
'value:Q',
scale=alt.Scale(range=[step, -step * overlap]),
axis= None
),
alt.Fill(
'mean_rate:Q',
legend=alt.Legend(title="Population rate of over age65"),
scale=alt.Scale(domain=[0.16, 0.11], scheme='inferno')
)
).facet(
row = alt.Row(
'year:N',
title="year",
header=alt.Header(titleFontSize=13, labelAngle=0, labelFontSize=12, labelAlign="left")
)
).properties(
title={
"text": ["US Population has been ageing clearly in last 10 years"],
"subtitle": ["Distribution of over65 population rate for each county"],
"color": "blue",
"subtitleColor": "black",
"subtitleFontSize":18,
"subtitleFontWeight":"bold"
},
bounds='flush'
).configure_facet(
spacing=0
).configure_view(
stroke=None
).configure_title(
anchor='end'
)
config = chart.configure_title(
fontSize=24,
fontWeight="bold",
anchor="middle"
).configure_legend(
titleColor='black',
titleFontSize=13,
titleFontWeight="bold",
orient='right',
fillColor="#ebf5ff",
padding=10
).configure_axisX(
titleFontWeight="bold",
labelFontSize=11,
grid=True,
gridDash=[2,3]
)
config
sc00_18 = pd.read_csv("sc00_18.csv")
sc00_18_wrkag = sc00_18[(sc00_18["AGE"]>=15) & (sc00_18["AGE"]<=64)]
sc00_18_ov65 = sc00_18[(sc00_18["AGE"]>=65) & (sc00_18["AGE"]!=999)]
sc00_18_wrkag_div = sc00_18_wrkag.groupby(["DIVISION"]).sum()
sc00_18_ov65_div = sc00_18_ov65.groupby(["DIVISION"]).sum()
sc00_18_wrkag_divt =sc00_18_wrkag_div.T[8:]
div_dic = {0:"USA", 1:"New England", 2:"Mid-Atlantic", 3:"East North Central", 4:"West North Central", \
5:"South Atlantic", 6:"East South Central", 7:"West South Central", 8:"Mountain", 9:"Pacific"}
div_list = []
for i in range(0, 10):
sc00_18_wrkag_divt_col = sc00_18_wrkag_divt[i].reset_index()
year = sc00_18_wrkag_divt_col["index"].apply(lambda x: x[6:10])
sc00_18_wrkag_divt_col["index"]=year
sc00_18_wrkag_divt_col = sc00_18_wrkag_divt_col.rename(columns={"index":"year", i:"pop"})
sc00_18_wrkag_divt_col["pct_chg"] = sc00_18_wrkag_divt_col["pop"].pct_change()
sc00_18_wrkag_divt_col["pct_chg"][0] = 0
sc00_18_wrkag_divt_col["pct_chg"] = sc00_18_wrkag_divt_col["pct_chg"]*100 +100
sc00_18_wrkag_divt_col
sc00_18_wrkag_divt_col["div"] = i
div = sc00_18_wrkag_divt_col["div"].apply(lambda x: div_dic[i])
sc00_18_wrkag_divt_col["div"] = div
div_list.append(sc00_18_wrkag_divt_col)
conc_div = pd.concat(div_list)
conc_div["a"] =100
alt.themes.enable('latimes')
box = pd.DataFrame({'x1': [2000], 'x2': [2018], 'y1': [0], 'y2': [100]})
domain = ["USA", "New England", "Mid-Atlantic","East North Central", "West North Central",\
"South Atlantic", "East South Central", "West South Central", "Mountain", "Pacific"]
range_ = ['red', 'blue', 'green','orange','pink','light-gray','light-gray','light-gray','light-gray','light-grey']
chart = alt.Chart(conc_div, width=800, height=400).mark_line().encode(
x=alt.X('year:O',
axis=alt.Axis(title="year")),
y = alt.Y('pct_chg:Q',
scale=alt.Scale(domain=(99, 103),),
axis=alt.Axis(title="percent_change(indexed to 100)")
),
color=alt.Color('div:N', scale=alt.Scale(domain=domain, range=range_),
legend=alt.Legend(title="Division",
values=["USA", "New England", "Mid-Atlantic","East North Central","West North Central",
"(light-grey)Other divisions"]))
).properties(
title={
"text": ["Working-age population starts declining in north-east areas"],
"subtitle": ["The change rate of working-age population for each division"],
"color": "blue",
"subtitleColor": "black",
"subtitleFontSize":18,
"subtitleFontWeight":"bold"
}
)
box = alt.Chart(conc_div).mark_rect(fill='#75bbff', stroke='none',strokeOpacity=0, fillOpacity=0.03).encode(
alt.X('year:N'),
alt.Y('a')
)
rule = alt.Chart(conc_div).mark_rule(color='black', strokeDash=[2,3], size=2).encode(
y='a:Q'
)
config = (chart+box+rule).configure_title(
fontSize=24,
fontWeight="bold",
anchor="middle"
).configure_legend(
titleColor='black',
titleFontSize=12,
titleFontWeight="bold",
orient='right',
fillColor="#ebf5ff",
padding=10
).configure_axisX(
labelFontSize=10,
labelAngle=-45,
grid=True,
gridDash=[2,3],
titleFontWeight="bold"
).configure_axisY(
labelFontSize=11,
grid=True,
gridDash=[2,3],
titleFontWeight="bold"
)
config
sc00_18_rate = sc00_18_wrkag_div
for i in range(8, len(sc00_18_rate.columns)):
sc00_18_rate.iloc[:,i] = np.divide(sc00_18_rate.iloc[:,i], sc00_18_ov65_div.iloc[:, i])
div_dic = {0:"USA", 1:"New England", 2:"Mid-Atlantic", 3:"East North Central", 4:"West North Central", \
5:"South Atlantic", 6:"East South Central", 7:"West South Central", 8:"Mountain", 9:"Pacific"}
sc00_18_rate2000 = sc00_18_rate.iloc[:, 8].reset_index()
sc00_18_rate2000["DIVISION"] = sc00_18_rate2000["DIVISION"].apply(lambda x: div_dic[x])
sc00_18_rate2000 = sc00_18_rate2000.rename(columns={"DIVISION":"Division", "POPEST2000_CIV": "per_wk"})
sc00_18_rate2000["Division"][2] = "Middle Atlantic"
sc00_18_rate2018 = sc00_18_rate.iloc[:,26].reset_index()
sc00_18_rate2018["DIVISION"] = sc00_18_rate2018["DIVISION"].apply(lambda x: div_dic[x])
sc00_18_rate2018 = sc00_18_rate2018.rename(columns={"DIVISION":"Division", "POPEST2018_CIV": "per_wk"})
sc00_18_rate2018["Division"][2] = "Middle Atlantic"
sc00_18_rate2000_div = sc00_18_rate2000[1:]
sc00_18_rate2018_div = sc00_18_rate2018[1:]
div = gpd.read_file("cb_2018_us_division_500k.shp")
col_use=["NAME","geometry"]
div = div[col_use]
div = div.rename(columns={"NAME":"Division"})
gdf1 = pd.merge(div, sc00_18_rate2000_div)
div["lon_lat"]= div.centroid
div2=div.drop(["geometry"], axis=1)
div2 = div2.rename(columns={"lon_lat":"geometry"})
div2["longitude"]= div2.geometry.x
div2["latitude"]= div2.geometry.y
div3 = pd.merge(div2, sc00_18_rate2000_div)
div3["per_wk"] = div3["per_wk"]
div3["longitude"] = div3["longitude"]
div4 = pd.merge(div2, sc00_18_rate2018_div)
div4["per_wk"] = div4["per_wk"]
div4["longitude"] = div4["longitude"]
choro_json = json.loads(gdf1.to_json())
choro_data = alt.Data(values=choro_json['features'])
alt.themes.enable('latimes')
base = alt.Chart(choro_data).mark_geoshape(
fill="lightgrey",
stroke='white',
strokeWidth=1
).encode(
).project(
type='albersUsa'
).properties(
title={
"text": "Lesser younger generation against elderly people",
"subtitle": ["The number of 65 over population per working-age person in 2000 and 2018"],
"color": "blue",
"subtitleColor": "black",
"subtitleFontSize":18,
"subtitleFontWeight":"bold"
},width=700,
height=500
)
points1 = alt.Chart(div3).mark_circle(color="red", fillOpacity=0.8).encode(
latitude="latitude:Q",
longitude="longitude:Q",
size=alt.Size("per_wk:Q",scale=alt.Scale(domain=[3.5, 6.1], range=[0, 3000]),
legend=alt.Legend(title="working-age/65over(2000)")
),
tooltip=["Division:N", "per_wk:Q"]
)
points2 = alt.Chart(div4).mark_circle(color="blue", fillOpacity=0.8).encode(
latitude="latitude:Q",
longitude="longitude:Q",
size=alt.Size("per_wk:Q",scale=alt.Scale(domain=[3.5, 6.1], range=[0, 3000]),
legend=alt.Legend(title="working-age/65over(2018)")
),
tooltip=["Division:N", "per_wk:Q"]
)
points = (points1 + points2).resolve_scale(size='independent')
config = (base + points)
config = config.configure_title(
fontSize=24,
fontWeight="bold",
anchor="middle"
).configure_legend(
titleColor='black',
titleFontSize=12,
titleFontWeight="bold",
symbolType="circle",
orient='right',
fillColor="#ebf5ff",
padding=10
).configure_axisX(
labelFontSize=10,
labelAngle=-45
).configure_axisY(
labelFontSize=10)
config
aging = pd.read_csv("https://docs.google.com/spreadsheets/d/1ggeuLBBugr7yyctZ1qM_evCxKdBYqq_LaDRRRmklsCY/export?format=csv&gid=1073389986", encoding='latin-1')[1:]
use_cols = ["YEAR","HC01_EST_VC01","HC02_EST_VC01","HC02_EST_VC09","HC02_EST_VC10","HC02_EST_VC11",\
"HC02_EST_VC12", "HC02_EST_VC13", "HC02_EST_VC14", "HC02_EST_VC15", "HC02_EST_VC16", "HC02_EST_VC18"]
aging = aging[use_cols]
aging.columns = ["Year", "total_pop", "total_pop_65", "total_pop65_onerace", "White", "Black",\
"Native","Asian","hawai","other", "two_or_more", "Hispanic or Latino"]
aging["Year"]= aging["Year"].astype("int")
aging["White"] = aging["White"].astype("float")
aging["Black"] = aging["Black"].astype("float")
aging["Native"] = aging["Native"].astype("float")
aging["Asian"] = aging["Asian"].astype("float")
aging["hawai"] = aging["hawai"].astype("float")
aging["other"] = aging["other"].astype("float")
aging["two_or_more"] = aging["two_or_more"].astype("float")
aging["Hispanic or Latino"] = aging["Hispanic or Latino"].astype("float")
aging["Others"] = aging["hawai"] + aging["other"] + aging["two_or_more"]
aging_nonhis = aging.drop(["hawai", "other", "two_or_more","Hispanic or Latino"], axis=1)
aging_his = aging[["Year", "Hispanic or Latino"]]
aging_his["Race"] = "Hispanic or Latino"
white = aging_nonhis[["Year","White"]]
white["Race"]="White"
white = white.rename(columns={"White":"pop65"})
white = white.sort_values(by=["Year"])
black = aging_nonhis[["Year","Black"]]
black["Race"]="Black"
black = black.rename(columns={"Black":"pop65"})
black = black.sort_values(by=["Year"])
native = aging_nonhis[["Year","Native"]]
native["Race"]="Native"
native = native.rename(columns={"Native":"pop65"})
native = native.sort_values(by=["Year"])
asian = aging_nonhis[["Year","Asian"]]
asian["Race"]="Asian"
asian = asian.rename(columns={"Asian":"pop65"})
asian = asian.sort_values(by=["Year"])
other = aging_nonhis[["Year","Others"]]
other["Race"]="Others"
other = other.rename(columns={"Others":"pop65"})
other = other.sort_values(by=["Year"])
aging_conc_nonhis = pd.concat([white, black, native, asian, other])
col_domain = ["White", "Black", "Native", "Asian", "Others"]
col_range = ["yellow", "blue", "red", "green", "pink"]
chart1 = alt.Chart(aging_conc_nonhis).mark_area(fillOpacity=0.6).encode(
x="Year:N",
y=alt.Y("pop65:Q",
scale=alt.Scale(domain=[0,100]),
axis=alt.Axis(title="Population of people over65(%)")
),
color=alt.Color("Race:N",
scale= alt.Scale(domain=col_domain, range=col_range))
).properties(
width=500,
height=400,
title={
"text": "Race of elderly people is being gradually diverged",
"subtitle": ["Racial structure for people over 65 years old in US(2006-2017)"],
"subtitleColor": "black",
"subtitleFontSize":18,
"subtitleFontWeight":"bold"
}
)
chart2 = alt.Chart(aging_his).mark_area(fillOpacity=0.6).encode(
x="Year:N",
y="Hispanic or Latino:Q",
color=alt.Color("Race:N",
scale = alt.Scale(range=["orange"]),
legend=alt.Legend(title=None)))
chart = (chart1 + chart2).resolve_scale(color='independent')
config = chart.configure_title(
fontSize=24,
color="blue",
fontWeight="bold",
anchor="middle"
).configure_legend(
titleColor='black',
titleFontSize=13,
labelFontSize=11,
orient='right',
fillColor="#ebf5ff",
padding=10
).configure_axisX(
titleFontSize=13,
titleFontWeight="bold",
labelFontSize=11,
labelAngle=-45,
grid=True,
gridDash=[2,3]
).configure_axisY(
titleFontSize=13,
titleFontWeight="bold",
labelFontSize=11)
config
df = pd.read_csv("fertility_data2.csv")
alt.themes.enable('latimes')
base = alt.Chart(df).properties(
width=700,
height=300,
title={
"text": "Fertility rate tends to be higher in rural areas",
"subtitle": ["Fertility rate per 1000 women for each county"],
"subtitleColor": "black",
"subtitleFontSize":18,
"subtitleFontWeight":"bold"
}
)
color_scale = alt.Scale(domain=['Midwest', 'Northeast', 'South','West', 'nan'],
range=['#98df8a', '#ff7f00','#ffed6f','#8624F5','#9d755d'])
hists = base.mark_bar(opacity=0.6, thickness=100).encode(
x=alt.X('fert_13_17',
bin=alt.Bin(maxbins=140), # step keeps bin size the same
scale=alt.Scale(domain=[0,200]),
axis=alt.Axis(title='Fertility_rate per 1000 women')),
y=alt.Y('count()',
stack=None,
scale=alt.Scale(domain=[0,100])),
color=alt.Color('region:N',scale=color_scale)
).configure_title(
fontSize=24,
color="blue",
fontWeight="bold",
anchor="middle"
).configure_legend(
titleColor='black',
titleFontSize=12,
titleFontWeight="bold",
orient='right',
fillColor="#ebf5ff",
padding=10
).configure_axisX(
labelFontSize=10,
grid=False,
gridDash=[2,3],
titleFontWeight="bold"
).configure_axisY(
labelFontSize=11,
grid=True,
gridDash=[2,3],
titleFontWeight="bold"
)
hists
county = gpd.read_file("UScounties.shp")
county = county.rename(columns={"FIPS":"geoid"})
county["geoid"]= county["geoid"].astype(int)
county = county[["geoid", "geometry"]]
gdf_fert = county.merge(df, on="geoid")
choro_json = json.loads(gdf_fert.to_json())
choro_data = alt.Data(values=choro_json['features'])
def evaluateForBivariate(row, x_bl, x_bh, y_bl, y_bh):
colorMatrix = [["#fd8d3c", "#843c39", "#393b79"],
["#f1e2cc", "#cccccc", "#4c78a8"],
["#ffffcc", "#c6bdef", "#3182bd"],]
xBoundaries = [x_bl, x_bh]
xIdx = 0
if row["total_pop"] < xBoundaries[0]:
xIdx = 0
elif row["total_pop"] < xBoundaries[1]:
xIdx = 1
else:
xIdx = 2
yBoundaries = [y_bl, y_bh]
yIdx = 0
if row["fert_13_17"] < yBoundaries[0]:
yIdx = 2
elif row["fert_13_17"] < yBoundaries[1]:
yIdx = 1
else:
yIdx = 0
return colorMatrix[yIdx][xIdx];
pop_33 = np.percentile(df["total_pop"], 33.3)
pop_66 = np.percentile(df["total_pop"], 66.7)
fert_33 = np.percentile(df["fert_13_17"], 33.3)
fert_66 = np.percentile(df["fert_13_17"], 66.7)
color = []
for i in range(0, len(df)):
row = df.iloc[i,:]
color.append(evaluateForBivariate(row, pop_33, pop_66, fert_33, fert_66))
df["color"] = np.array(color)
county = gpd.read_file("UScounties.shp")
county = county.rename(columns={"FIPS":"geoid"})
county["geoid"]= county["geoid"].astype(int)
county = county[["geoid", "geometry"]]
gdf_fert = county.merge(df, on="geoid")
choro_json = json.loads(gdf_fert.to_json())
choro_data = alt.Data(values=choro_json['features'])
color_range = ["#fd8d3c", "#843c39", "#393b79","#f1e2cc", "#cccccc", "#4c78a8", "#ffffcc", "#c6bdef", "#3182bd"]
color_domain = ["#fd8d3c", "#843c39", "#393b79","#f1e2cc", "#cccccc", "#4c78a8", "#ffffcc", "#c6bdef", "#3182bd"]
alt.themes.enable('latimes')
base = alt.Chart(choro_data).mark_geoshape(
stroke='black',
strokeWidth=1
).encode(
).project(
type='albersUsa'
).properties(
title={
"text": "Fertility rate tends to be higher in rural areas",
"subtitle": ["Fertility rate per 1000 women for each county"],
"subtitleColor": "black",
"subtitleFontSize":18,
"subtitleFontWeight":"bold"
},width=700,
height=500
)
# Add Choropleth Layer
choro = alt.Chart(choro_data).mark_geoshape().encode(
color = alt.Color("properties.color:N",
scale = alt.Scale(domain=color_domain, range=color_range),
legend=None,
title = "Fertility rate per 1000 women"),
tooltip=['properties.county:O','properties.fert_13_17:Q']
).project(
type='albersUsa'
)
config = base+choro
# Legend
x = np.array(['<33 perct.', '<67 perct.', '>67 perct.', '<33 perct.', '<67 perct.', '>67 perct.', '<33 perct.', '<67 perct.', '>67 perct.'])
y = np.array(['>67 perct.', '>67 perct.', '>67 perct.', '<67 perct.', '<67 perct.', '<67 perct.', '<33 perct.', '<33 perct.', '<33 perct.'])
z = np.array([["#fd8d3c", "#843c39", "#393b79"],
["#f1e2cc", "#cccccc", "#4c78a8"],
["#ffffcc", "#c6bdef", "#3182bd"]], dtype=str)
source = pd.DataFrame({'x': x,
'y': y,
'z': z.ravel()})
legend = alt.Chart(source).mark_rect().encode(
x=alt.X('x:N',
axis=alt.Axis(title="Population",
labelFontSize=10,
labelAngle=0)),
y=alt.Y('y:N',
axis=alt.Axis(title="Fertility rate/1000 women",
labelFontSize=10,
labelAngle=-90)),
color=alt.Color("z:N", scale=alt.Scale(domain=color_domain, range=color_range,),
legend=None)
).properties(width=200, height=150)
config = alt.hconcat(config, legend)
config = config.configure_title(
color='blue',
fontSize=24,
fontWeight="bold",
anchor="middle"
).configure_axisX(
labelFontSize=11
).configure_axisY(
labelFontSize=11)
config
df = df.iloc[:,2:]
df["above_med_wh"] = np.where(df["white_pct"]>= np.median(df["white_pct"]), 1, 0)
county["lon_lat"]= county.centroid
county = county[["geoid", "lon_lat"]]
county = county.rename(columns={"lon_lat":"geometry"})
gdf_pts = county.merge(df, on="geoid")
gdf_pts = gdf_pts[gdf_pts["state"] != "Alaska"]
gdf_pts = gdf_pts[gdf_pts["state"] != "Hawaii"]
gdf_ab_wh = gdf_pts[gdf_pts["above_med_wh"]==1]
gdf_bl_wh = gdf_pts[gdf_pts["above_med_wh"]==0]
gdf_ab_wh = gpd.GeoDataFrame(gdf_ab_wh)
gdf_bl_wh = gpd.GeoDataFrame(gdf_bl_wh)
gdf_ab_wh["region"] = gdf_ab_wh["region"].astype(str)
gdf_bl_wh["region"] = gdf_bl_wh["region"].astype(str)
fert_25 = np.percentile(df["fert_13_17"], 25)
fert_50 = np.percentile(df["fert_13_17"], 50)
fert_75 = np.percentile(df["fert_13_17"], 75)
perc_ab = []
perc_bl = []
for i in range(0, len(gdf_ab_wh)):
row = gdf_ab_wh.iloc[i,:]
if row["fert_13_17"] < fert_25:
perc_ab.append("<25 percentile")
elif row["fert_13_17"] < fert_50:
perc_ab.append("<50 percentile")
elif row["fert_13_17"] < fert_75:
perc_ab.append("<75 percentile")
else:
perc_ab.append(">=75 percentile")
for i in range(0, len(gdf_bl_wh)):
row = gdf_bl_wh.iloc[i,:]
if row["fert_13_17"] < fert_25:
perc_bl.append("<25 percentile")
elif row["fert_13_17"] < fert_50:
perc_bl.append("<50 percentile")
elif row["fert_13_17"] < fert_75:
perc_bl.append("<75 percentile")
else:
perc_bl.append(">=75 percentile")
gdf_ab_wh["perc"] = perc_ab
gdf_bl_wh["perc"] = perc_bl
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa'))
fig = plt.figure(figsize=(20,15))
plt.title("Fertility rate for counties with high/low percentage of white people", fontsize=18)
st = fig.suptitle("Racial diversity might affect fertility", fontsize=24,
fontweight="bold", color="blue")
proj = projection=gcrs.AlbersEqualArea(central_latitude=45.7128, central_longitude=-100.0059)
ax1 = plt.subplot(211, projection=proj)
ax2 = plt.subplot(212, projection=proj)
ax1 = gplt.pointplot(gdf_ab_wh, projection=proj, scale="fert_13_17", hue='perc',
cmap='inferno_r',limits=(1, 20), legend=True, legend_var='hue',
edgecolor='white', linewidth=0.5, ax=ax1)
ax2 = gplt.pointplot(gdf_bl_wh, projection=proj, scale="fert_13_17", hue='perc',
cmap='inferno_r',limits=(1, 20),legend=True, legend_var="scale",
edgecolor='white', linewidth=0.5, ax=ax2)
gplt.polyplot(contiguous_usa,edgecolor='white', facecolor='lightgray',
ax=ax1)
ax1.set_title("Fertility rate for county with high pct of white, 2013-2017",
fontdict={'fontsize': 16})
gplt.polyplot(contiguous_usa,edgecolor='white', facecolor='lightgray',
ax=ax2)
ax2.set_title("Fertility rate for county with low pct of white, 2013-2017",
fontdict={'fontsize': 16})
fig.tight_layout()
a1 = alt.Chart(df).properties(
width=300,
height=200,
title={
"text": ["Unemployment rate & poverty rate"],
"color": "black",
"fontSize":13
}
).mark_rect().encode(
alt.X('Unemployment_rate_2018:Q', bin=alt.Bin(maxbins=60),
axis=alt.Axis(title='Unemployment rate')),
alt.Y('poverty_rate:Q', bin=alt.Bin(maxbins=40),
axis=alt.Axis(title='Poverty rate')),
alt.Color("fert_13_17:Q", scale=alt.Scale(domain =[0, 150], scheme='inferno'),
legend = alt.Legend(title="Fertility rate/1000 women"))
)
a2 = alt.Chart(df).properties(
width=300,
height=200,
title={
"text": ["Non-high school degree rate & poverty rate"],
"color": "black",
"fontSize":13
}
).mark_rect().encode(
alt.X('pct_bl_hs:Q', bin=alt.Bin(maxbins=60),
axis=alt.Axis(title='Non high school rate')),
alt.Y('poverty_rate:Q', bin=alt.Bin(maxbins=40),
axis=alt.Axis(title='Poverty rate')),
alt.Color("fert_13_17:Q", scale=alt.Scale(domain =[0, 150], scheme='inferno'))
)
a3 = alt.Chart(df).properties(
width=300,
height=200,
title={
"text": ["Bachelor or higher rate & poverty rate"],
"color": "black",
"fontSize":13
}
).mark_rect().encode(
alt.X('pct_bach_high:Q', bin=alt.Bin(maxbins=60),
axis=alt.Axis(title='Bachelor or higher degree rate')),
alt.Y('poverty_rate:Q', bin=alt.Bin(maxbins=40),
axis=alt.Axis(title='Poverty rate')),
alt.Color("fert_13_17:Q", scale=alt.Scale(domain =[0, 150], scheme='inferno'))
)
a4 = alt.Chart(df).properties(
width=300,
height=200,
title={
"text": ["Crime rate & poverty rate"],
"color": "black",
"fontSize":13
}
).mark_rect().encode(
alt.X('crime_rate:Q', bin=alt.Bin(maxbins=60),
axis=alt.Axis(title='Crime rate per person')),
alt.Y('poverty_rate:Q', bin=alt.Bin(maxbins=40),
axis=alt.Axis(title='Poverty rate')),
alt.Color("fert_13_17:Q", scale=alt.Scale(domain =[0, 200], scheme='inferno'))
)
chart1 = a1|a2
chart2 = a3|a4
chart = alt.VConcatChart(
vconcat=[chart1, chart2],
title={
"text": "Fertility rate would be correlated with some socioeconomic factors",
"subtitle": ["The correlation between fertility rate and four socioeconomic factors for county level"],
"subtitleColor": "black",
"subtitleFontSize":18,
"subtitleFontWeight":"bold"
}
)
chart.configure_title(
fontSize=24,
color="blue",
fontWeight="bold",
anchor="middle"
).configure_legend(
titleColor='black',
titleFontSize=11,
titleFontWeight="bold",
orient='right',
fillColor="#ebf5ff",
padding=10
).configure_axisX(
labelFontSize=11
).configure_axisY(
labelFontSize=11)